{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from collections import Counter\n",
    "import paddlehub as hub\n",
    "import paddle\n",
    "from sklearn.model_selection import train_test_split\n",
    "from paddlehub.datasets.base_nlp_dataset import TextClassificationDataset\n",
    "\n",
    "df=pd.read_excel('moods_classify8_unprocessed.xlsx')\n",
    "df.head(10)\n",
    "df.info()\n",
    "df.isnull().any()\n",
    "df[df.isnull().values==True]\n",
    "df.dropna(subset=['text','label'],axis=0,how='any',inplace=True)\n",
    "df.isnull().any()\n",
    "dtype:bool\n",
    "df[df.duplicated('text')]\n",
    "df.drop_duplicates(subset='text',keep='first',inplace=True)\n",
    "df.duplicated('text').any()\n",
    "plt.boxplot(x=df.label,\n",
    "            whis=1.5,\n",
    "            widths=0.8,\n",
    "            patch_artist=True,\n",
    "            showmeans=True,\n",
    "            boxprops={'facecolor':'steelblue'},\n",
    "            flierprops={'marker':'D','markerfacecolor':'black','markersize':4},\n",
    "            medianprops={'linestyle':'--','color':'orange'},\n",
    "            labels=[' '])\n",
    "\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
